import pandas as pd
import numpy as np
import altair as alt
import geopandas as gpd
import hvplot.pandas
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import datetime
import math
from pandana.loaders import osm
import foliumUnveiling the Dynamics of Pedestrian Movement in Center City, Philadelphia
Xiayuanshan Gao
Overview
In this comprehensive study of pedestrian movement in Center City, Philadelphia, I’ve dissected walking patterns through a multi-faceted lens to understand the underpinnings of foot traffic dynamics in 2012. Initially, I examined the types of walking trips and the socioeconomic profiles of pedestrians, revealing a preference for walking across a spectrum of income brackets, age groups, and educational levels. Subsequently, I mapped the geography of pedestrian travel, highlighting the central district as the focal point of walking activities, enriched by its transit hubs and amenities. Delving into what attracts people to walk, I uncovered that restaurants, schools, commercial blocks, and green spaces play pivotal roles in drawing pedestrians. Lastly, I employed a regression model to quantify the influence of urban features on pedestrian movement, confirming the importance of amenities like restaurants and public transit in promoting walkability.
1. Profiling the Walking Trips and Pedestrians in Philadelphia in 2012
In the first section, I delve into the pedestrian dynamics of Philadelphia in 2012, aiming to unpack the walking trip patterns and profile those who most frequently travel by foot. My exploration spans various dimensions, from the origins and destinations of these journeys to the walkers’ socioeconomic characteristics, including income levels, vehicle ownership, gender, age, race, and education. This comprehensive overview not only illuminates the prevalent trends in walking trips but also highlights the diverse tapestry of individuals who opt for pedestrian travel, thereby offering an encompassing snapshot of urban mobility within the city.
trip = pd.read_excel('data/4_Trip_Public.xlsx')
trip.head ()| ID | HH_ID | PERSON_NUM | PERSON_ID | HH_WEIGHT | P_WEIGHT | TOUR_NUM | WKSUB_NUM | WKSUB_ID | TRIP_NUM | ... | D_COUNTY | D_CPA | D_MCD | D_TRACT | D_TAZ | GPSFactor | Survey_TravTime | Model_TravTime | Model_TravDist | CompositeWeight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 100140 | 1 | 10014001 | 133.460324 | 132.54307 | 1.0 | NaN | 1.001400e+11 | 1 | ... | 34007.0 | 007_802 | 3.400712e+09 | 3.400760e+10 | 22228.0 | 1.12 | 15.0 | 7.456654 | 1.535654 | 148.448239 |
| 1 | 2 | 100140 | 1 | 10014001 | 133.460324 | 132.54307 | 1.0 | NaN | 1.001400e+11 | 2 | ... | 34007.0 | 007_802 | 3.400712e+09 | 3.400760e+10 | 22231.0 | 1.12 | 7.0 | 7.810088 | 1.675524 | 148.448239 |
| 2 | 3 | 100140 | 1 | 10014001 | 133.460324 | 132.54307 | 1.0 | NaN | 1.001400e+11 | 3 | ... | 34005.0 | 005_702 | 3.400544e+09 | 3.400570e+10 | 20225.0 | 1.12 | 12.0 | 9.363016 | 2.301180 | 148.448239 |
| 3 | 4 | 100140 | 1 | 10014001 | 133.460324 | 132.54307 | 1.0 | NaN | 1.001400e+11 | 4 | ... | 34005.0 | 005_702 | 3.400544e+09 | 3.400570e+10 | 20227.0 | 1.12 | 16.0 | 6.233144 | 1.086251 | 148.448239 |
| 4 | 5 | 100140 | 1 | 10014001 | 133.460324 | 132.54307 | 1.0 | NaN | 1.001400e+11 | 5 | ... | 34005.0 | 005_702 | 3.400544e+09 | 3.400570e+10 | 20225.0 | 1.12 | 10.0 | 6.258231 | 1.208349 | 148.448239 |
5 rows × 82 columns
# Trim to Phily and bike trip
walk = trip[(trip['O_COUNTY'] == 42101) & (trip['MODE_AGG'] == 1)]
walk.head()| ID | HH_ID | PERSON_NUM | PERSON_ID | HH_WEIGHT | P_WEIGHT | TOUR_NUM | WKSUB_NUM | WKSUB_ID | TRIP_NUM | ... | D_COUNTY | D_CPA | D_MCD | D_TRACT | D_TAZ | GPSFactor | Survey_TravTime | Model_TravTime | Model_TravDist | CompositeWeight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12 | 13 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 3 | ... | 42101.0 | 101_103 | 4.210160e+09 | 4.210100e+10 | 113.0 | 1.17 | 15.0 | 16.783789 | 0.867162 | 328.937729 |
| 13 | 14 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 4 | ... | 42101.0 | 101_103 | 4.210160e+09 | 4.210100e+10 | 44.0 | 1.17 | 11.0 | 12.350352 | 0.638102 | 328.937729 |
| 302 | 314 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 4 | ... | 42101.0 | 101_103 | 4.210160e+09 | 4.210100e+10 | 35.0 | 1.13 | 9.0 | NaN | NaN | 246.584019 |
| 303 | 315 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 5 | ... | 42101.0 | 101_103 | 4.210160e+09 | 4.210100e+10 | 38.0 | 1.13 | 10.0 | 11.381527 | 0.588046 | 246.584019 |
| 991 | 1009 | 109412 | 1 | 10941201 | 193.156252 | 82.416676 | 1.0 | NaN | 1.094120e+11 | 3 | ... | 42101.0 | 101_103 | 4.210160e+09 | 4.210100e+10 | 55.0 | 1.13 | 10.0 | 16.072276 | 0.830401 | 93.130844 |
5 rows × 82 columns
1.1 What Type of Walking Trips Happened in Philadelphia in 2012
From the presented data, it is evident that walking trips in Philadelphia in 2012 were predominantly initiated and concluded at a variety of locations classified under ‘Other’, denoting a diverse range of non-home and non-work-related starting and ending points. This diversity in trip locations correlates with a higher frequency of walking trips associated with home activities that are not related to work, school, or online engagements. Despite the variety in origins and destinations, there is a notable preference for walking trips that end at ‘Home’, suggesting that Philadelphia residents may prefer walking for returning home from various activities. In contrast, walking trips starting from ‘Work’ are less frequent than those ending there, hinting at the possibility that walking is less preferred for commuting to work. The subsequent chart detailing the types of activities associated with walking trips reinforces the inference that non-work-related activities, particularly those centered around the home, are the primary motivators for walking.
The first pair of charts compare the frequency of different types of locations from where people start and end their walking trips. The categories include ‘Other’, ‘Work’, ‘Home’, and ‘School’. For origins, the ‘Other’ category has the highest count, indicating that most walking trips begin at locations other than work, home, or school. This is followed by ‘Work’, ‘Home’, and ‘School’ which has the least count. For destinations, ‘Other’ also has the highest count, suggesting that destinations are similarly varied. However, ‘Home’ has a significantly higher count as a destination than as an origin, while ‘Work’ has a lower count as a destination than as an origin. ‘School’ remains the least frequent destination.
replacement_dict = {1: 'Home', 2: 'Work', 3: 'School', 4: 'Other'}
walk['O_LOC_TYPE'] = walk['O_LOC_TYPE'].replace(replacement_dict)
walk['D_LOC_TYPE'] = walk['D_LOC_TYPE'].replace(replacement_dict)
sns.set_style("whitegrid")
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
# Plot Origin Loc Type
sns.countplot(x='O_LOC_TYPE', data=walk, ax=ax[0])
ax[0].set_title('Frequency of Origin Loc Type ')
ax[0].set_xlabel('Origin Loc Type ')
ax[0].set_ylabel('Count')
# Plot Destination Loc Type
sns.countplot(x='D_LOC_TYPE', data=walk, ax=ax[1])
ax[1].set_title('Frequency of Destination Loc Type')
ax[1].set_xlabel('Destination Loc Type')
ax[1].set_ylabel('Count')
# Show the plot
plt.tight_layout()
plt.show()C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\1475632616.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
walk['O_LOC_TYPE'] = walk['O_LOC_TYPE'].replace(replacement_dict)
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\1475632616.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
walk['D_LOC_TYPE'] = walk['D_LOC_TYPE'].replace(replacement_dict)

The following chart shows the frequency of different types of activities for which walking is used. The most common activity is ‘Home activities not related to work, school, or online’, indicating that walking is most frequently used for local errands or tasks around the house. This is followed by ‘Work for pay’ and ‘Change type of transportation/transfer’. Activities like ‘Social (visit friends, relatives, etc.)’, ‘Medical (medical appointment, medical procedure, etc.)’, and ‘Recreation-watch/observe (movies, concert, sports event, etc.)’ have lower frequencies.
# Find out activity type
activity_dict = { 1: 'Home activities not related to work, school, or online',
2: 'Homework, class related assignments or attended an online course',
3: 'Attended classes',
4: 'Attended other school activities (performances, meetings, clubs)',
5: 'Work for pay',
6: 'Personal business (banking or ATM, salon, library)',
7: 'Online personal business (banking, e-mail, etc.)',
8: 'Everyday shopping (grocery, drug store, gas, etc.)',
9: 'Major shopping (appliances, cars, home furnishings, clothes, etc.)',
10: 'Online shopping for products, services or goods',
11: 'Eat out (restaurant, drive-thru, etc.)',
12: 'Social (visit friends, relatives, etc.)',
13: 'Social/community/religious (meetings, worhship, wedding, funeral, etc.)',
14: 'Recreation- active participation (sports, exercise, walk the dog, etc.)',
15: 'Recreation-watch/observe (movies, concert, sports event, etc.)',
16: 'Medical (medical appointment, medical procedure, etc.)',
17: 'Pick up passenger',
18: 'Drop off passenger',
19: 'Change type of transportation/transfer',
20: 'Accompany household member',
96: 'Other activity'}
walk['ACTIV1'] = walk['ACTIV1'].replace(activity_dict)
plt.figure(figsize=(10, 8))
activity_counts = walk['ACTIV1'].value_counts()
sns.barplot(y=activity_counts.index, x=activity_counts.values)
plt.title('Frequency of Walk Activity Type')
plt.xlabel('Count')
plt.ylabel('Activity')
for index, value in enumerate(activity_counts.values):
plt.text(value, index, f'{value}')
plt.show()C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\28425172.py:24: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
walk['ACTIV1'] = walk['ACTIV1'].replace(activity_dict)

1.2 Who Prefer to Walk
Then, the following analysis of Philadelphia’s pedestrian demographics in 2012 reveals a nuanced portrait influenced by various socioeconomic factors. The data on total vehicles indicates that individuals with no vehicles in their household are the most likely to walk, which is consistent across all income brackets except for the highest and lowest, where walking is less prevalent. Income distribution among pedestrians shows a fairly even spread, with slight peaks in the middle-income ranges, suggesting that walking is not exclusively linked to any single income level.
Gender distribution shows a modest preference for walking among females compared to males. Age-wise, the 25-74 age group is the most inclined to walk, with a notable peak in the 55-59 bracket, highlighting the potential for walking as a consistent transportation choice across a broad adult age spectrum.
Racial demographics indicate that White individuals and those identifying as Asian are more represented among walkers than other races, with Black/African American and Hispanic or Latino individuals walking less by comparison. Education level data shows a clear trend: individuals with higher educational attainment, specifically those with a bachelor’s or graduate degree, are more likely to walk than those with lower levels of education or some college credit.
In summary, the typical pedestrian in Philadelphia in 2012 would likely be a person without a vehicle in their household, belonging to the middle-income bracket, and leaning slightly towards the female demographic. Age-wise, adults, particularly those between 25 to 74 years old, with a concentration around late 50s, are the most common walkers. A higher educational background correlates positively with the likelihood of walking, while White and Asian residents tend to walk more than other racial groups. These insights draw a picture of pedestrians in Philadelphia as a diverse group with a significant representation among middle-aged, educated individuals, suggesting that walking is a transportation mode that cuts across various socioeconomic layers, albeit with certain demographic tendencies.
# Load other df
hh = pd.read_excel('data/1_Household_Public.xlsx')
per = pd.read_excel('data/2_Person_Public.xlsx')
import pandas as pd
import matplotlib.pyplot as plt
walk = walk.merge(hh[['HH_ID', 'TOT_VEH', 'INCOME']], on='HH_ID', how='left')
walk = walk.merge(per[['PERSON_ID', 'GEND', 'AGECAT', 'RACE', 'EDUCA']], on='PERSON_ID', how='left')# Create mappings for the variables hh and person
gend_mapping = {1: 'Male', 2: 'Female', 99: 'Refused'}
agecat_mapping = {
1: '5 or younger', 2: '6 to 12', 3: '13 to 15', 4: '16 to 17',
5: '18 to 24', 6: '25 to 34', 7: '35 to 44', 8: '45 to 54',
9: '55 to 64', 10: '65 to 74', 11: '75 to 85', 12: '86 and over',
98: "Don't know", 99: 'Refused'
}
race_mapping = {
1: 'White/Caucasian', 2: 'Black/African American', 3: 'Hispanic or Latino',
4: 'American Indian or Alaska Native', 5: 'Asian', 6: 'Native Hawaiian or Pacific Islander',
97: 'Other, Specify', 98: "Don't Know", 99: 'Refused', 100: 'Multi-race'
}
educa_mapping = {
1: 'Not a high school graduate', 2: 'High school graduate', 3: 'Some college credit',
4: 'Associate or technical school degree', 5: "Bachelor's or undergraduate degree",
6: 'Graduate degree', 97: 'Other', 98: "Don't know", 99: 'Refused'
}
income_mapping = {
1: '$0 to $9,999', 2: '$10,000 to $24,999', 3: '$25,000 to $34,999', 4: '$35,000 to $49,999',
5: '$50,000 to $74,999', 6: '$75,000 to $99,999', 7: '$100,000 to $149,999',
8: '$150,000 to $199,999', 9: '$200,000 to $249,999', 10: '$250,000 or more',
98: "Don't know", 99: 'Refused'
}
walk['GEND'] = walk['GEND'].map(gend_mapping)
walk['AGECAT'] = walk['AGECAT'].map(agecat_mapping)
walk['RACE'] = walk['RACE'].map(race_mapping)
walk['EDUCA'] = walk['EDUCA'].map(educa_mapping)
walk['INCOME'] = walk['INCOME'].map(income_mapping)
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
walk['TOT_VEH'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 0], title='Total Vehicles')
walk['INCOME'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 1], title='Income')
walk['GEND'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 2], title='Gender')
walk['AGECAT'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 0], title='Age Category')
walk['RACE'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 1], title='Race')
walk['EDUCA'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 2], title='Education')
plt.tight_layout()
plt.show()
In conclusion, the multifaceted examination of walking trips in Philadelphia in 2012 reveals that walking is a mode of transportation embraced across a spectrum of socioeconomic backgrounds, with particular tendencies shaped by the absence of vehicles, middle-income brackets, and higher educational attainments. Notably, the proclivity to walk is slightly more common among females and notably prevalent in the 25-74 age demographic, with a surprising peak among those in their late fifties. Additionally, while White and Asian residents are more represented among pedestrians, the data indicates that walking is a universally adopted practice with a presence in every income category and demographic segment. These findings reflect the inherent complexity of pedestrian movement patterns and suggest that walking, as an integral element of urban life, is influenced by a confluence of personal, economic, and cultural factors that define the pedestrian landscape of Philadelphia.
2. Where Do People Walk in Philadelphia in 2012
In the second section of my analysis, I turn my focus to the spatial dynamics of pedestrian movement within Philadelphia, unraveling the patterns of where residents choose to walk. Through a series of flow maps and heat maps, I discern the urban fabric’s influence on pedestrian behavior. The central district emerges as the epicenter of pedestrian activity, with a vibrant flux of walkers traversing its streets, likely due to commercial, cultural, and civic amenities. Adjacent areas such as the lower north, south, University southwest, and west districts also exhibit significant pedestrian traffic, albeit less densely clustered than the central district. In stark contrast, the lower southwest and lower south districts present as pedestrian deserts, areas where foot traffic is markedly sparse. These regions may lack pedestrian-friendly infrastructure or destinations that encourage walking. The overlaid heat maps, distinguishing between origins and destinations, further underscore these patterns, revealing hotspots of pedestrian origination and culmination. When combined, these visual tools paint a detailed picture of walking patterns across the city, highlighting areas of high pedestrian engagement and pinpointing zones that could benefit from urban design interventions to enhance walkability and connectivity.
# load Census Tract
import pygris
pa_state_code = '42'
philly_county_code = '101'
philly_tract = pygris.tracts(state=pa_state_code, county=philly_county_code, year=2012)
philly_tract.head(5)| STATEFP | COUNTYFP | TRACTCE | GEOID | NAME | NAMELSAD | MTFCC | FUNCSTAT | ALAND | AWATER | INTPTLAT | INTPTLON | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1783 | 42 | 101 | 038100 | 42101038100 | 381 | Census Tract 381 | G5020 | S | 3335030 | 1629584 | +40.0189334 | -075.0399564 | POLYGON ((-75.06785 40.01121, -75.06752 40.011... |
| 1784 | 42 | 101 | 038400 | 42101038400 | 384 | Census Tract 384 | G5020 | S | 2546786 | 0 | +40.0714163 | -075.2332568 | POLYGON ((-75.24321 40.07356, -75.24313 40.073... |
| 1785 | 42 | 101 | 038600 | 42101038600 | 386 | Census Tract 386 | G5020 | S | 3304398 | 32066 | +40.0582762 | -075.2110913 | POLYGON ((-75.22199 40.07306, -75.22128 40.073... |
| 1786 | 42 | 101 | 038500 | 42101038500 | 385 | Census Tract 385 | G5020 | S | 1338942 | 2576 | +40.0757528 | -075.2146234 | POLYGON ((-75.22784 40.08423, -75.22774 40.084... |
| 1787 | 42 | 101 | 038700 | 42101038700 | 387 | Census Tract 387 | G5020 | S | 2053212 | 22241 | +40.0833122 | -075.2130770 | POLYGON ((-75.22977 40.08676, -75.22961 40.086... |
philly_tract = philly_tract.to_crs(epsg=4326)
# Calculate the centroid points in geographic coordinates
philly_tract['centroid'] = philly_tract.geometry.centroid
philly_tract['centroid_lat'] = philly_tract.centroid.y
philly_tract['centroid_lon'] = philly_tract.centroid.x
philly_tract.head()C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\3754762874.py:4: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
philly_tract['centroid'] = philly_tract.geometry.centroid
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\3754762874.py:5: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
philly_tract['centroid_lat'] = philly_tract.centroid.y
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\3754762874.py:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
philly_tract['centroid_lon'] = philly_tract.centroid.x
| STATEFP | COUNTYFP | TRACTCE | GEOID | NAME | NAMELSAD | MTFCC | FUNCSTAT | ALAND | AWATER | INTPTLAT | INTPTLON | geometry | centroid | centroid_lat | centroid_lon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1783 | 42 | 101 | 038100 | 42101038100 | 381 | Census Tract 381 | G5020 | S | 3335030 | 1629584 | +40.0189334 | -075.0399564 | POLYGON ((-75.06785 40.01121, -75.06752 40.011... | POINT (-75.03963 40.01770) | 40.017698 | -75.039634 |
| 1784 | 42 | 101 | 038400 | 42101038400 | 384 | Census Tract 384 | G5020 | S | 2546786 | 0 | +40.0714163 | -075.2332568 | POLYGON ((-75.24321 40.07356, -75.24313 40.073... | POINT (-75.23326 40.07142) | 40.071418 | -75.233255 |
| 1785 | 42 | 101 | 038600 | 42101038600 | 386 | Census Tract 386 | G5020 | S | 3304398 | 32066 | +40.0582762 | -075.2110913 | POLYGON ((-75.22199 40.07306, -75.22128 40.073... | POINT (-75.21027 40.05805) | 40.058053 | -75.210272 |
| 1786 | 42 | 101 | 038500 | 42101038500 | 385 | Census Tract 385 | G5020 | S | 1338942 | 2576 | +40.0757528 | -075.2146234 | POLYGON ((-75.22784 40.08423, -75.22773 40.084... | POINT (-75.21369 40.07591) | 40.075907 | -75.213688 |
| 1787 | 42 | 101 | 038700 | 42101038700 | 387 | Census Tract 387 | G5020 | S | 2053212 | 22241 | +40.0833122 | -075.2130770 | POLYGON ((-75.22977 40.08676, -75.22961 40.086... | POINT (-75.21434 40.08330) | 40.083299 | -75.214342 |
philly_tract['TRACTCE'] = philly_tract['TRACTCE'].astype(str)
walk['O_TRACT'] = walk['O_TRACT'].astype(str)
walk['D_TRACT'] = walk['D_TRACT'].astype(str)
# only keep the strings for tract
walk['O_TRACT'] = walk['O_TRACT'].str[5:11]
walk['D_TRACT'] = walk['D_TRACT'].str[5:11]
walk.head()| ID | HH_ID | PERSON_NUM | PERSON_ID | HH_WEIGHT | P_WEIGHT | TOUR_NUM | WKSUB_NUM | WKSUB_ID | TRIP_NUM | ... | Survey_TravTime | Model_TravTime | Model_TravDist | CompositeWeight | TOT_VEH | INCOME | GEND | AGECAT | RACE | EDUCA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 3 | ... | 15.0 | 16.783789 | 0.867162 | 328.937729 | 2 | $50,000 to $74,999 | Male | 45 to 54 | NaN | Bachelor's or undergraduate degree |
| 1 | 14 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 4 | ... | 11.0 | 12.350352 | 0.638102 | 328.937729 | 2 | $50,000 to $74,999 | Male | 45 to 54 | NaN | Bachelor's or undergraduate degree |
| 2 | 314 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 4 | ... | 9.0 | NaN | NaN | 246.584019 | 2 | Refused | Male | 35 to 44 | NaN | Graduate degree |
| 3 | 315 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 5 | ... | 10.0 | 11.381527 | 0.588046 | 246.584019 | 2 | Refused | Male | 35 to 44 | NaN | Graduate degree |
| 4 | 1009 | 109412 | 1 | 10941201 | 193.156252 | 82.416676 | 1.0 | NaN | 1.094120e+11 | 3 | ... | 10.0 | 16.072276 | 0.830401 | 93.130844 | 2 | Refused | Female | 65 to 74 | NaN | Graduate degree |
5 rows × 88 columns
# Merge latitude and longitude for origins
walk = walk.merge(
philly_tract[['TRACTCE', 'centroid_lat', 'centroid_lon']].rename(
columns={'centroid_lat': 'origin_lat', 'centroid_lon': 'origin_lon','TRACTCE': 'origin_tra'}
),
how='left', left_on='O_TRACT', right_on='origin_tra'
)
# Merge latitude and longitude for destinations
walk = walk.merge(
philly_tract[['TRACTCE', 'centroid_lat', 'centroid_lon']].rename(
columns={'centroid_lat': 'destination_lat', 'centroid_lon': 'destination_lon','TRACTCE': 'destination_tra'}
),
how='left', left_on='D_TRACT', right_on='destination_tra'
)
walk.head()| ID | HH_ID | PERSON_NUM | PERSON_ID | HH_WEIGHT | P_WEIGHT | TOUR_NUM | WKSUB_NUM | WKSUB_ID | TRIP_NUM | ... | GEND | AGECAT | RACE | EDUCA | origin_tra | origin_lat | origin_lon | destination_tra | destination_lat | destination_lon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 3 | ... | Male | 45 to 54 | NaN | Bachelor's or undergraduate degree | 000804 | 39.948566 | -75.167755 | 000300 | 39.957150 | -75.171232 |
| 1 | 14 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 4 | ... | Male | 45 to 54 | NaN | Bachelor's or undergraduate degree | 000300 | 39.957150 | -75.171232 | 000804 | 39.948566 | -75.167755 |
| 2 | 314 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 4 | ... | Male | 35 to 44 | NaN | Graduate degree | 001002 | 39.945006 | -75.146618 | 001002 | 39.945006 | -75.146618 |
| 3 | 315 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 5 | ... | Male | 35 to 44 | NaN | Graduate degree | 001002 | 39.945006 | -75.146618 | 000902 | 39.947161 | -75.156637 |
| 4 | 1009 | 109412 | 1 | 10941201 | 193.156252 | 82.416676 | 1.0 | NaN | 1.094120e+11 | 3 | ... | Female | 65 to 74 | NaN | Graduate degree | 000500 | 39.951955 | -75.158176 | 000600 | 39.949386 | -75.158334 |
5 rows × 94 columns
import folium
average_lat = walk[['origin_lat', 'destination_lat']].stack().mean()
average_lon = walk[['origin_lon', 'destination_lon']].stack().mean()
od = folium.Map(location=[average_lat, average_lon], zoom_start=12,
tiles='CartoDB positron')
for idx, row in walk.iterrows():
if pd.notnull(row['origin_lat']) and pd.notnull(row['origin_lon']):
folium.CircleMarker(
location=[row['origin_lat'], row['origin_lon']],
radius=3,
color='skyblue',
fill=True,
fill_color='skyblue'
).add_to(od)
if pd.notnull(row['destination_lat']) and pd.notnull(row['destination_lon']):
folium.CircleMarker(
location=[row['destination_lat'], row['destination_lon']],
radius=3,
color='pink',
fill=True,
fill_color='pink'
).add_to(od)
if pd.notnull(row['origin_lat']) and pd.notnull(row['origin_lon']) and \
pd.notnull(row['destination_lat']) and pd.notnull(row['destination_lon']):
folium.PolyLine(
locations=[
[row['origin_lat'], row['origin_lon']],
[row['destination_lat'], row['destination_lon']]
],
color='lavender',
weight=1
).add_to(od)
od# Load the district boundaries file
districts_gdf = gpd.read_file('data/Planning_Districts.geojson')
districts_gdf = districts_gdf.to_crs(epsg=4326)# plot the heatmap with origin
from folium.plugins import HeatMap
heatmap_data = walk[['origin_lat', 'origin_lon']].dropna().values.tolist()
o_des = folium.Map(location=[average_lat, average_lon], zoom_start=12, tiles='CartoDB positron')
HeatMap(heatmap_data).add_to(o_des)
for _, row in districts_gdf.iterrows():
sim_geo = gpd.GeoSeries(row['geometry']).simplify(tolerance=0.001)
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j,
style_function=lambda x: {'fillColor': 'transparent', 'color': 'black'})
geo_j.add_to(o_des)
o_des# plot the heatmap with origin
from folium.plugins import HeatMap
heatmap_data2 = walk[['destination_lat', 'destination_lon']].dropna().values.tolist()
d_des = folium.Map(location=[average_lat, average_lon], zoom_start=12, tiles='CartoDB positron')
HeatMap(heatmap_data2).add_to(d_des)
for _, row in districts_gdf.iterrows():
sim_geo = gpd.GeoSeries(row['geometry']).simplify(tolerance=0.001)
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j,
style_function=lambda x: {'fillColor': 'transparent', 'color': 'black'})
geo_j.add_to(d_des)
d_desfrom folium.plugins import HeatMap
# Assuming walk['origin_lat'] and walk['origin_lon'] are the coordinates for origins
heatmap_data_origins = walk[['origin_lat', 'origin_lon']].dropna().values.tolist()
# Assuming walk['destination_lat'] and walk['destination_lon'] are the coordinates for destinations
heatmap_data_destinations = walk[['destination_lat', 'destination_lon']].dropna().values.tolist()
# Create a Folium map centered around the average coordinates
heat_combined = folium.Map(location=[average_lat, average_lon], zoom_start=12, tiles='CartoDB positron')
# Add the origin heatmap layer with one color gradient
HeatMap(heatmap_data_origins, radius=7, blur=15, gradient={0.2: 'blue', 0.4: 'cyan', 0.6: 'lime', 0.8: 'yellow'}).add_to(heat_combined)
# Add the destination heatmap layer with another color gradient
HeatMap(heatmap_data_destinations, radius=7, blur=15, gradient={0.2: 'purple', 0.4: 'violet', 0.6: 'magenta', 0.8: 'pink'}).add_to(heat_combined)
# Overlay the district boundaries
for _, row in districts_gdf.iterrows():
sim_geo = gpd.GeoSeries(row['geometry']).simplify(tolerance=0.001)
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j,
style_function=lambda x: {'fillColor': 'transparent', 'color': 'black', 'weight': 1})
geo_j.add_to(heat_combined)
heat_combined3. Explore Center City as the Pedestrain Busy District
3.1 Why and Who Walk in Center City
After uncovering the pedestrain busy district of center city, here in this section, I specifically examined the walk trips and pedestrains in center city.
In my exploration of pedestrian activities within Philadelphia’s central district, I identified distinctive patterns of movement. The data indicated a high frequency of ‘transfer’ activities, which suggests that the central district serves not only as a destination but also as a pivotal hub for pedestrian movement through the city. This is reflective of the dense network of public transit and the district’s role as a connector for various parts of the city. Among other prominent activities, ‘work for pay’ and ‘eat out (restaurant, drive-thru, etc.)’ were also significant, highlighting the central district’s commercial and social importance. Surprisingly, ‘home activities’ were the most frequent type of walk, which may be attributed to the residential pockets within the central district or the tendency of individuals to categorize errands close to home as ‘home activities’.
import geopandas as gpd
from shapely.geometry import Point
# filter out the walk trip in the central district
walk_gdf = gpd.GeoDataFrame(walk, geometry=gpd.points_from_xy(walk.origin_lon, walk.origin_lat))
walk_gdf.crs = districts_gdf.crs
central_polygon = districts_gdf.loc[districts_gdf['DIST_NAME'] == 'Central', 'geometry'].squeeze()
walk_in_central = walk_gdf[walk_gdf.geometry.within(central_polygon)]
walk_in_central.head()| ID | HH_ID | PERSON_NUM | PERSON_ID | HH_WEIGHT | P_WEIGHT | TOUR_NUM | WKSUB_NUM | WKSUB_ID | TRIP_NUM | ... | AGECAT | RACE | EDUCA | origin_tra | origin_lat | origin_lon | destination_tra | destination_lat | destination_lon | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 3 | ... | 45 to 54 | NaN | Bachelor's or undergraduate degree | 000804 | 39.948566 | -75.167755 | 000300 | 39.957150 | -75.171232 | POINT (-75.16775 39.94857) |
| 1 | 14 | 100206 | 1 | 10020601 | 204.106167 | 281.143358 | 1.0 | NaN | 1.002060e+11 | 4 | ... | 45 to 54 | NaN | Bachelor's or undergraduate degree | 000300 | 39.957150 | -75.171232 | 000804 | 39.948566 | -75.167755 | POINT (-75.17123 39.95715) |
| 2 | 314 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 4 | ... | 35 to 44 | NaN | Graduate degree | 001002 | 39.945006 | -75.146618 | 001002 | 39.945006 | -75.146618 | POINT (-75.14662 39.94501) |
| 3 | 315 | 102372 | 1 | 10237201 | 209.139870 | 218.215946 | 1.0 | NaN | 1.023720e+11 | 5 | ... | 35 to 44 | NaN | Graduate degree | 001002 | 39.945006 | -75.146618 | 000902 | 39.947161 | -75.156637 | POINT (-75.14662 39.94501) |
| 4 | 1009 | 109412 | 1 | 10941201 | 193.156252 | 82.416676 | 1.0 | NaN | 1.094120e+11 | 3 | ... | 65 to 74 | NaN | Graduate degree | 000500 | 39.951955 | -75.158176 | 000600 | 39.949386 | -75.158334 | POINT (-75.15818 39.95195) |
5 rows × 95 columns
activity_mapping = { 1: 'Home activities not related to work, school, or online',
2: 'Homework, class related assignments or attended an online course',
3: 'Attended classes',
4: 'Attended other school activities (performances, meetings, clubs)',
5: 'Work for pay',
6: 'Personal business (banking or ATM, salon, library)',
7: 'Online personal business (banking, e-mail, etc.)',
8: 'Everyday shopping (grocery, drug store, gas, etc.)',
9: 'Major shopping (appliances, cars, home furnishings, clothes, etc.)',
10: 'Online shopping for products, services or goods',
11: 'Eat out (restaurant, drive-thru, etc.)',
12: 'Social (visit friends, relatives, etc.)',
13: 'Social/community/religious (meetings, worhship, wedding, funeral, etc.)',
14: 'Recreation- active participation (sports, exercise, walk the dog, etc.)',
15: 'Recreation-watch/observe (movies, concert, sports event, etc.)',
16: 'Medical (medical appointment, medical procedure, etc.)',
17: 'Pick up passenger',
18: 'Drop off passenger',
19: 'Change type of transportation/transfer',
20: 'Accompany household member',
96: 'Other activity'}
walk_in_central['ACTIV1'] = walk_in_central['ACTIV1'].map(activity_mapping)
# Plotting
plt.figure(figsize=(10, 8))
activity_counts = walk_in_central['ACTIV1'].value_counts()
ordered_activities = activity_counts.reindex(activity_mapping.values())
ordered_activities.plot(kind='bar')
plt.title('Frequency of Walk Activity Type')
plt.xlabel('Activity')
plt.ylabel('Count')
plt.xticks(rotation=90)
for index, value in enumerate(ordered_activities.values):
plt.text(index, value, f'{value}')
plt.tight_layout()
plt.show()D:\Anaconda\Install\envs\musa-550-fall-2023\lib\site-packages\geopandas\geodataframe.py:1538: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
super().__setitem__(key, value)

Upon reprofiling the pedestrian demographic within the central district, I found that the socioeconomic characteristics closely mirrored those observed citywide. Vehicle ownership, income, age, race, gender, and education level demonstrated similar trends, with no significant deviations. This lack of disparity suggests a consistent pedestrian demographic across the city, irrespective of the more intense walking activity in the central district. Most walkers belonged to households without a vehicle, spanned across a diverse range of income levels, and were predominantly White/Caucasian with at least a graduate degree, reflecting the broader city’s pedestrian profile. However, it was interesting to note that the central district’s pedestrian population was just as diverse as the rest of Philadelphia, reinforcing the notion that walking as a mode of transport transcends socioeconomic boundaries within urban centers.
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
walk_in_central['TOT_VEH'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 0], title='Total Vehicles')
walk_in_central['INCOME'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 1], title='Income')
walk_in_central['GEND'].value_counts().sort_index().plot(kind='bar', ax=axs[0, 2], title='Gender')
walk_in_central['AGECAT'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 0], title='Age Category')
walk_in_central['RACE'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 1], title='Race')
walk_in_central['EDUCA'].value_counts().sort_index().plot(kind='bar', ax=axs[1, 2], title='Education')
plt.tight_layout()
plt.show()
3.2 What are There in Center City Attracting People to Walk Here
In the latter half of my analysis of Center City, I delved into the urban elements that contribute to its high pedestrian traffic. I used OpenStreetMap to overlay various components of the urban environment, revealing a dense and intricate web of walkways that form the backbone of pedestrian movement in the area. My mapping highlighted an extensive network of roads frequented by pedestrians, which are well-serviced by an array of public transit options, including numerous bus stops and metro stations strategically located throughout the district.
The presence of these amenities suggests a cityscape thoughtfully designed to encourage walking, where the convenience of public transport complements pedestrian pathways. Moreover, my data visualizations pinpointed clusters of restaurants, schools, and commercial blocks, which serve as vital nodes of activity, drawing people onto the streets. These commercial hubs are not only destinations but also waypoints that enliven the pedestrian experience with their vibrancy and accessibility.
import osmnx as ox
ox.config(use_cache=True, log_console=True)
# For bus stops
bus_stops = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'highway': 'bus_stop'})
# For metro stops
metro_stops = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'railway': 'station'})
# For restaurants
restaurants = ox.geometries_from_place('Center City,Philadelphia, PA', tags={"amenity": ["pub", "bar", "restaurant"]})
# For schools
schools = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'amenity': 'school'})
# For commercial blocks
commercial = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'landuse': 'commercial'})
# For parks
parks = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'leisure': 'park'})C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:2: UserWarning: The `utils.config` function is deprecated and will be removed in a future release. Instead, use the `settings` module directly to configure a global setting's value. For example, `ox.settings.log_console=True`.
ox.config(use_cache=True, log_console=True)
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:5: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
bus_stops = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'highway': 'bus_stop'})
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:8: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
metro_stops = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'railway': 'station'})
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:11: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
restaurants = ox.geometries_from_place('Center City,Philadelphia, PA', tags={"amenity": ["pub", "bar", "restaurant"]})
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:14: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
schools = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'amenity': 'school'})
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:17: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
commercial = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'landuse': 'commercial'})
C:\Users\gaoxi\AppData\Local\Temp\ipykernel_29064\2733245946.py:20: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release.
parks = ox.geometries_from_place('Center City,Philadelphia, PA', tags={'leisure': 'park'})
# the "walk" network
G = ox.graph_from_address("Center City, Philadelphia, PA", network_type="walk")
ox.plot_graph(ox.project_graph(G), node_size=0)
(<Figure size 800x800 with 1 Axes>, <Axes: >)
G_cc = ox.graph_from_polygon(central_polygon, network_type="walk")
# only get the edges
cc_edges = ox.graph_to_gdfs(G_cc, edges=True, nodes=False)
# add the neighborhood boundaries
boundary = gpd.GeoSeries([central_polygon], crs="EPSG:4326")
cc_edges.explore(tiles='cartodb positron')